extensions/sse2-float: add gamma and premul as single step
authorØyvind Kolås <pippin@gimp.org>
Thu, 15 Dec 2016 01:41:32 +0000 (02:41 +0100)
committerØyvind Kolås <pippin@gimp.org>
Thu, 15 Dec 2016 01:41:32 +0000 (02:41 +0100)
extensions/sse2-float.c

index 72463eee929ebe42b4b183d3200452866c98d178..71fa008f2104758dd590385b99511748b3d5c728 100644 (file)
@@ -296,7 +296,7 @@ gamma_2_2_to_linear_sse2 (__v4sf x)
 }
 
 #define GAMMA_RGBA(func, munge) \
-static long \
+static inline long \
 func (const float *src, float *dst, long samples)\
 {\
   int i = samples;\
@@ -353,6 +353,14 @@ func (const float *src, float *dst, long samples)\
 GAMMA_RGBA(conv_rgbaF_linear_rgbaF_gamma, linear_to_gamma_2_2_sse2)
 GAMMA_RGBA(conv_rgbaF_gamma_rgbaF_linear, gamma_2_2_to_linear_sse2)
 
+static long conv_rgbaF_linear_rgbAF_gamma (const float *src, float *dst, long samples)
+{
+  float *tmp = alloca (sizeof(float)*4*samples);
+  conv_rgbaF_linear_rgbaF_gamma (src, tmp, samples);
+  conv_rgbaF_linear_rgbAF_linear (tmp, dst, samples);
+  return samples;
+}
+
 #define YA_APPLY(load, store, convert) \
 { \
   __v4sf yyaa0, yyaa1; \
@@ -637,6 +645,12 @@ init (void)
                           conv_rgbaF_linear_rgbAF_linear,
                           NULL);
                           
+      babl_conversion_new(rgbaF_linear, 
+                          rgbAF_gamma,
+                          "linear",
+                          conv_rgbaF_linear_rgbAF_gamma,
+                          NULL);
+
       /* Which of these is faster varies by CPU, and the difference
        * is big enough that it's worthwhile to include both and
        * let them fight it out in the babl benchmarks.